Modeling workflow for Barro Colorado Islands
Analysis from Leite et al. 2022
This script is part of the repository of analysis from Leite et al. 2022. Major axe of variation in tree demography across global forests.
In this section, we will work with Barro Colorado Island (BCI) data as an example of the modeling pipeline used in the analysis of the 21 ForestGEO plots.
For data cleaning/wrangling, please see the previous script
1_dataPreparation.Rmd.
For the example analyses, we used the 100 x 100 m (1ha) quadrat grain size, which is the fastest scale for models to run.
No-time/reduced models
For the example of models without temporal organizing principles (reduced models), we use the last cenus interval (7).
1. Growth reduced model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "growth.rds"))
dad <- bind_rows(df, .id = "time")
data <- dad[dad$time == 7, ]
data$quadrat <- data[,colnames(data) == "quad_100"]Models parameters.
delta = 0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(g.dbh ~ 1 + (1|quadrat) + (1|sp) + (1|quadrat:sp),
data=data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin,
seed=T)
save(gm, file = here("workflow_example", "bci_models_outputs","no_time_models",
"grow", "bci-7-quad_100-model.Rdata"))It took more than 9 hours to run the model in a high performance
cluster with parallel chains. I do not recommend you to run it in your
personal computer. The model is already saved in the
bci_models_output folder as an .Rdata file (>320MB):
load(here("workflow_example", "bci_models_outputs","no_time_models",
"grow", "bci-7-quad_100-model.Rdata"))Warning: If you cloned this repository originally from GitHub, and the chunk above doesn’t work, it is probably because you need to instal git large files storage (LFS) before cloning it in order to download the model’s files correctly.
Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: g.dbh ~ 1 + (1 | quadrat) + (1 | sp) + (1 | quadrat:sp)
## Data: data (Number of observations: 170852)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.16 0.02 0.13 0.21 1.00 805 969
##
## ~quadrat:sp (Number of levels: 7552)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.38 0.01 0.36 0.40 1.00 657 848
##
## ~sp (Number of levels: 274)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.97 0.05 0.88 1.08 1.01 307 507
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.43 0.07 1.32 1.56 1.00 247 499
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.23 0.00 1.23 1.24 1.00 1191 951
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","no_time_models",
"grow", "bci-7-quad_100-traceplot.pdf"),
paper="a4", height = 20, width=10, onefile=F)
plot(gm, ask=F, N=5)
dev.off()## quartz_off_screen
## 2
plot(gm, ask=F, N=5)Table of model results.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$spec_pars)
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
`quadrat:sp` = quadrat.sp,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(data = "all",
fplot = "bci",
time = "7",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 5), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| data | fplot | time | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| all | bci | 7 | quad_100 | 170852 | 274 | intercept | 1.43 | 0.07 | 1.32 | 1.56 | 1.00 | 246.80 | 499.45 | NA | 2.05 |
| all | bci | 7 | quad_100 | 170852 | 274 | quadrat | 0.16 | 0.02 | 0.13 | 0.21 | 1.00 | 805.39 | 968.54 | 50 | 0.03 |
| all | bci | 7 | quad_100 | 170852 | 274 | sp | 0.97 | 0.05 | 0.88 | 1.08 | 1.01 | 307.27 | 506.59 | 274 | 0.94 |
| all | bci | 7 | quad_100 | 170852 | 274 | quadrat:sp | 0.38 | 0.01 | 0.36 | 0.40 | 1.00 | 657.50 | 848.22 | 7552 | 0.15 |
| all | bci | 7 | quad_100 | 170852 | 274 | Residual | 1.23 | 0.00 | 1.23 | 1.24 | 1.00 | 1191.45 | 951.31 | 170852 | 1.52 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","no_time_models",
"grow", "bci-7-quad_100-table.Rdata"))2. Mortality reduced model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "mortality.rds"))
dad <- bind_rows(df, .id = "time")
data <- dad[dad$time == 7, ]
data$quadrat <- data[,colnames(data) == "quad_100"]Models parameters.
delta=0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(dead ~ 1 + (1|quadrat) + (1|sp) + (1|quadrat:sp) +
offset(log(y.interval)),
family = bernoulli(link="cloglog"),
data = data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin)
save(gm, file = here("workflow_example", "bci_models_outputs","no_time_models",
"mort", "bci-7-quad_100-model.Rdata"))The model is already saved in the bci_models_output
folder.
load(here("workflow_example", "bci_models_outputs", "no_time_models",
"mort", "bci-7-quad_100-model.Rdata"))Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: bernoulli
## Links: mu = cloglog
## Formula: dead ~ 1 + (1 | quadrat) + (1 | sp) + (1 | quadrat:sp) + offset(log(y.interval))
## Data: data (Number of observations: 204617)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.11 0.01 0.08 0.14 1.00 1120 1092
##
## ~quadrat:sp (Number of levels: 8203)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.23 0.01 0.21 0.26 1.00 1001 1131
##
## ~sp (Number of levels: 289)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.99 0.05 0.90 1.10 1.00 640 858
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.60 0.07 -3.72 -3.45 1.01 299 506
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","no_time_models",
"mort", "bci-7-quad_100-traceplot.pdf"),
paper="a4", height = 20, width=10, onefile=F)
plot(gm, N=4, ask=F)
dev.off()## quartz_off_screen
## 2
plot(gm, ask=F, N=4)Results table.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$spec_pars)
res[1,] <- c(sqrt((pi^2)/6), rep(NA,6))
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
`quadrat:sp` = quadrat.sp,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(data = "all",
fplot = "bci",
time = "7",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 5), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| data | fplot | time | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| all | bci | 7 | quad_100 | 204617 | 289 | intercept | -3.60 | 0.07 | -3.72 | -3.45 | 1.01 | 298.95 | 506.39 | NA | 12.93 |
| all | bci | 7 | quad_100 | 204617 | 289 | quadrat | 0.11 | 0.01 | 0.08 | 0.14 | 1.00 | 1120.30 | 1092.06 | 50 | 0.01 |
| all | bci | 7 | quad_100 | 204617 | 289 | sp | 0.99 | 0.05 | 0.90 | 1.10 | 1.00 | 639.92 | 857.78 | 289 | 0.99 |
| all | bci | 7 | quad_100 | 204617 | 289 | quadrat:sp | 0.23 | 0.01 | 0.21 | 0.26 | 1.00 | 1000.95 | 1130.83 | 8203 | 0.05 |
| all | bci | 7 | quad_100 | 204617 | 289 | Residual | 1.28 | NA | NA | NA | NA | NA | NA | 204617 | 1.64 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","no_time_models",
"mort", "bci-7-quad_100-table.Rdata"))3. Recruitment reduced model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "recruitment.rds"))
dad <- bind_rows(df, .id = "time")
data <- dad[dad$time == 7, ]
data$quadrat <- data[,colnames(data) == "quad_100"]Models parameters.
delta=0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(dead ~ 1 + (1|quadrat) + (1|sp) + (1|quadrat:sp) +
offset(log(y.interval)),
family = bernoulli(link="cloglog"),
data = data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin)
save(gm, file = here("workflow_example", "bci_models_outputs","no_time_models",
"rec", "bci-7-quad_100-model.Rdata"))The model is already saved in the bci_models_output
folder.
load(here("workflow_example", "bci_models_outputs","no_time_models",
"rec", "bci-7-quad_100-model.Rdata"))Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: bernoulli
## Links: mu = cloglog
## Formula: rec ~ 1 + (1 | quadrat) + (1 | sp) + (1 | quadrat:sp) + offset(log(y.interval))
## Data: data (Number of observations: 218635)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.24 0.03 0.20 0.31 1.00 948 1061
##
## ~quadrat:sp (Number of levels: 8320)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.45 0.01 0.42 0.47 1.00 944 999
##
## ~sp (Number of levels: 289)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.07 0.05 0.97 1.19 1.00 499 769
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.56 0.08 -3.72 -3.41 1.00 267 470
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","no_time_models",
"rec", "bci-7-quad_100-traceplot.pdf"),
paper="a4", height = 20, width=10, onefile=FALSE)
plot(gm)
dev.off()## quartz_off_screen
## 2
plot(gm, ask=F, N=4)Table of model results.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$spec_pars)
res[1,] <- c(sqrt((pi^2)/6), rep(NA,6))
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
`quadrat:sp` = quadrat.sp,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(data = "all",
fplot = "bci",
time = "7",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 5), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| data | fplot | time | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| all | bci | 7 | quad_100 | 218635 | 289 | intercept | -3.56 | 0.08 | -3.72 | -3.41 | 1 | 267.38 | 470.15 | NA | 12.71 |
| all | bci | 7 | quad_100 | 218635 | 289 | quadrat | 0.24 | 0.03 | 0.20 | 0.31 | 1 | 948.10 | 1061.23 | 50 | 0.06 |
| all | bci | 7 | quad_100 | 218635 | 289 | sp | 1.07 | 0.05 | 0.97 | 1.19 | 1 | 499.02 | 768.72 | 289 | 1.15 |
| all | bci | 7 | quad_100 | 218635 | 289 | quadrat:sp | 0.45 | 0.01 | 0.42 | 0.47 | 1 | 944.06 | 999.04 | 8320 | 0.20 |
| all | bci | 7 | quad_100 | 218635 | 289 | Residual | 1.28 | NA | NA | NA | NA | NA | NA | 218635 | 1.64 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","no_time_models",
"rec", "bci-7-quad_100-table.Rdata"))Time/complete models
Here, we show as example only one of the 10 subsampled data models.
Loading the code for the first random 5-1ha quadrats:
load(here("data", "samples_5ha.Rdata"))
samples$bci$bci_1 # quadrats## [1] "700_400" "900_400" "700_300" "600_300" "100_200"
1. Growth time model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "growth.rds"))
data <- bind_rows(df, .id = "time")
data$quadrat <- data[,colnames(data) == "quad_100"]For growth in BCI, we excluded the first census interval given measurement problems in the first census.
data <- data %>% filter(time != "1")Taking the subsample.
data <- data[data$quad_100 %in% samples$bci$bci_1, ]Models parameters
delta=0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(g.dbh ~ 1 + (1|quadrat) + (1|sp) + (1|time) +
(1|quadrat:sp) + (1|quadrat:time) + (1|sp:time),
data=data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin,
save_pars = save_pars(group=FALSE),
seed=T)
save(gm, file = here("workflow_example", "bci_models_outputs","time_models",
"grow", "bci-quad_100-1-model.Rdata"))This model (as all the time models) takes more than 4 days to run in
a high performance cluster. I don’t advise you to run it in your
personal computer. The model is already saved in the
bci_models_output folder.
load(here("workflow_example", "bci_models_outputs","time_models",
"grow", "bci-quad_100-1-model.Rdata"))Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: g.dbh ~ 1 + (1 | quadrat) + (1 | sp) + (1 | time) + (1 | quadrat:sp) + (1 | quadrat:time) + (1 | sp:time)
## Data: data (Number of observations: 112319)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 5)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.06 0.01 0.30 1.00 1108 1153
##
## ~quadrat:sp (Number of levels: 868)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.33 0.02 0.29 0.37 1.00 935 1061
##
## ~quadrat:time (Number of levels: 30)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.12 0.02 0.09 0.18 1.00 1138 1011
##
## ~sp (Number of levels: 230)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.93 0.05 0.83 1.04 1.00 1041 1074
##
## ~sp:time (Number of levels: 1273)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.18 0.01 0.16 0.21 1.00 900 1048
##
## ~time (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.31 0.11 0.17 0.77 1.00 1150 1059
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.25 0.16 0.89 1.62 1.00 1224 1169
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.21 0.00 1.21 1.22 1.00 1248 1165
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","time_models",
"grow", "bci-quad_100-1-traceplot.pdf"),
paper="a4", height = 20, width=10,
onefile = T)
plot(gm, ask=F, N=4)
dev.off()plot(gm, ask=F, N=4)Table of model results.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$spec_pars)
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
time <- as.data.frame(gmsum$random$time)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
quadrat.time <- as.data.frame(gmsum$random$`quadrat:time`)
sp.time <- as.data.frame(gmsum$random$`sp:time`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
time = time,
`quadrat:sp` = quadrat.sp,
`quadrat:time` = quadrat.time,
`sp:time` = sp.time,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$time[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$ngrps$`quadrat:time`[1],
gmsum$ngrps$`sp:time`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(sub = "1",
data = "all",
fplot = "bci",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 8), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| sub | data | fplot | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | all | bci | quad_100 | 112319 | 230 | intercept | 1.25 | 0.16 | 0.89 | 1.62 | 1 | 1223.62 | 1168.62 | NA | 1.57 |
| 1 | all | bci | quad_100 | 112319 | 230 | quadrat | 0.08 | 0.06 | 0.01 | 0.30 | 1 | 1107.59 | 1152.64 | 5 | 0.01 |
| 1 | all | bci | quad_100 | 112319 | 230 | sp | 0.93 | 0.05 | 0.83 | 1.04 | 1 | 1040.95 | 1074.23 | 230 | 0.87 |
| 1 | all | bci | quad_100 | 112319 | 230 | time | 0.31 | 0.11 | 0.17 | 0.77 | 1 | 1149.71 | 1058.80 | 6 | 0.10 |
| 1 | all | bci | quad_100 | 112319 | 230 | quadrat:sp | 0.33 | 0.02 | 0.29 | 0.37 | 1 | 934.75 | 1060.70 | 868 | 0.11 |
| 1 | all | bci | quad_100 | 112319 | 230 | quadrat:time | 0.12 | 0.02 | 0.09 | 0.18 | 1 | 1137.98 | 1010.96 | 30 | 0.02 |
| 1 | all | bci | quad_100 | 112319 | 230 | sp:time | 0.18 | 0.01 | 0.16 | 0.21 | 1 | 899.76 | 1048.09 | 1273 | 0.03 |
| 1 | all | bci | quad_100 | 112319 | 230 | Residual | 1.21 | 0.00 | 1.21 | 1.22 | 1 | 1247.88 | 1165.33 | 112319 | 1.47 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","time_models",
"grow", "bci-quad_100-1-table.Rdata"))2. Mortality time model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "mortality.rds"))
data <- bind_rows(df, .id = "time")
data$quadrat <- data[,colnames(data) == "quad_100"]Including column for the time interval between tree measurements, in years.
data$y.interval <- as.numeric(data$interval/365) # interval yearsTaking the subsample 1.
data <- data[data$quad_100 %in% samples$bci$bci_1, ]Models parameters.
delta=0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(dead ~ 1 + (1|quadrat) + (1|sp) + (1|time) +
(1|quadrat:sp) + (1|quadrat:time) + (1|sp:time) +
offset(log(y.interval)),
family = bernoulli(link="cloglog"),
data=data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin,
seed=T,
save_pars = save_pars(group=FALSE),
inits="0")
save(gm, file = here("workflow_example", "bci_models_outputs","time_models",
"mort", "bci-quad_100-1-model.Rdata"))The model is already saved in the bci_models_output
folder.
load(here("workflow_example", "bci_models_outputs","time_models",
"mort", "bci-quad_100-1-model.Rdata"))Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: bernoulli
## Links: mu = cloglog
## Formula: dead ~ 1 + (1 | quadrat) + (1 | sp) + (1 | time) + (1 | quadrat:sp) + (1 | quadrat:time) + (1 | sp:time) + offset(log(y.interval))
## Data: data (Number of observations: 153641)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 5)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.17 0.08 0.07 0.50 1.00 968 1067
##
## ~quadrat:sp (Number of levels: 983)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.20 0.02 0.17 0.24 1.00 791 980
##
## ~quadrat:time (Number of levels: 35)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.14 0.02 0.10 0.20 1.00 961 1064
##
## ~sp (Number of levels: 263)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.99 0.06 0.88 1.11 1.00 682 1026
##
## ~sp:time (Number of levels: 1666)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.25 0.02 0.22 0.29 1.00 970 966
##
## ~time (Number of levels: 7)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.09 0.05 0.01 0.26 1.00 790 961
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.63 0.12 -3.91 -3.38 1.00 812 1004
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","time_models",
"mort", "bci-quad_100-1-traceplot.pdf"),
paper="a4", height = 20, width=10,
onefile = T)
plot(gm, ask=F, N=4)
dev.off()plot(gm, ask=F, N=4)Table of model results.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$fixed)
res[1,] <- c(sqrt((pi^2)/6), rep(NA,6))
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
time <- as.data.frame(gmsum$random$time)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
quadrat.time <- as.data.frame(gmsum$random$`quadrat:time`)
sp.time <- as.data.frame(gmsum$random$`sp:time`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
time = time,
`quadrat:sp` = quadrat.sp,
`quadrat:time` = quadrat.time,
`sp:time` = sp.time,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$time[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$ngrps$`quadrat:time`[1],
gmsum$ngrps$`sp:time`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(sub = "1",
data = "all",
fplot = "bci",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 8), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| sub | data | fplot | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | all | bci | quad_100 | 153641 | 263 | intercept | -3.63 | 0.12 | -3.91 | -3.38 | 1 | 811.96 | 1004.49 | NA | 13.21 |
| 1 | all | bci | quad_100 | 153641 | 263 | quadrat | 0.17 | 0.08 | 0.07 | 0.50 | 1 | 968.45 | 1067.14 | 5 | 0.03 |
| 1 | all | bci | quad_100 | 153641 | 263 | sp | 0.99 | 0.06 | 0.88 | 1.11 | 1 | 681.66 | 1025.66 | 263 | 0.98 |
| 1 | all | bci | quad_100 | 153641 | 263 | time | 0.09 | 0.05 | 0.01 | 0.26 | 1 | 790.46 | 961.33 | 7 | 0.01 |
| 1 | all | bci | quad_100 | 153641 | 263 | quadrat:sp | 0.20 | 0.02 | 0.17 | 0.24 | 1 | 791.22 | 979.64 | 983 | 0.04 |
| 1 | all | bci | quad_100 | 153641 | 263 | quadrat:time | 0.14 | 0.02 | 0.10 | 0.20 | 1 | 960.81 | 1064.26 | 35 | 0.02 |
| 1 | all | bci | quad_100 | 153641 | 263 | sp:time | 0.25 | 0.02 | 0.22 | 0.29 | 1 | 970.02 | 965.81 | 1666 | 0.06 |
| 1 | all | bci | quad_100 | 153641 | 263 | Residual | 1.28 | NA | NA | NA | NA | NA | NA | 153641 | 1.64 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","time_models",
"mort", "bci-quad_100-1-table.Rdata"))3. Recruitment time model
Loading the cleaned data.
df <- readRDS(here("workflow_example", "bci_cleandata", "recruitment.rds"))
data <- bind_rows(df, .id = "time")
data$quadrat <- data[,colnames(data) == "quad_100"]Including column for the quadrat mean time interval in years.
inter <- data %>% group_by(time, quadrat) %>%
summarise(y.interval = as.numeric(mean(interval, na.rm = T)/365))
# if interval is 0 or NaN, use mean of plot
inter$y.interval[is.nan(inter$y.interval) | inter$y.interval == 0] <-
mean(inter$y.interval, na.rm = T)Taking the subsample 1
data <- data[data$quad_100 %in% samples$bci$bci_1, ]Models parameters.
delta=0.95
ncores = 3 # number of chains
iter = 3000 # number of iterations per chain
warmup = 1000 # number of burning iterations discarded per chain
thin = 5 # thinning intervalModel.
gm <- brm(rec ~ 1 + (1|quadrat) + (1|sp) + (1|time) +
(1|quadrat:sp) + (1|quadrat:time) + (1|sp:time) +
offset(log(y.interval)),
family = bernoulli(link="cloglog"),
data=data,
chains = ncores,
control = list(adapt_delta = delta),
cores = ncores,
iter = iter, warmup=warmup,
thin = thin,
seed=T,
save_pars = save_pars(group=FALSE),
inits="0")
save(gm, file = here("workflow_example", "bci_models_outputs","time_models",
"rec", "bci-quad_100-1-model.Rdata"))The model is already saved in the bci_models_output
folder.
load(here("workflow_example", "bci_models_outputs","time_models",
"rec", "bci-quad_100-1-model.Rdata"))Summary table with posterior medians.
gmsum <- summary(gm, robust=T) # median posterior
gmsum## Family: bernoulli
## Links: mu = cloglog
## Formula: rec ~ 1 + (1 | quadrat) + (1 | sp) + (1 | time) + (1 | quadrat:sp) + (1 | quadrat:time) + (1 | sp:time) + offset(log(y.interval))
## Data: data (Number of observations: 159050)
## Draws: 3 chains, each with iter = 3000; warmup = 1000; thin = 5;
## total post-warmup draws = 1200
##
## Group-Level Effects:
## ~quadrat (Number of levels: 5)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.06 0.06 0.00 0.33 1.00 1108 1169
##
## ~quadrat:sp (Number of levels: 961)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.22 0.02 0.18 0.26 1.00 1062 1090
##
## ~quadrat:time (Number of levels: 35)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.30 0.04 0.23 0.41 1.00 1073 1029
##
## ~sp (Number of levels: 256)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.93 0.06 0.82 1.06 1.00 1117 970
##
## ~sp:time (Number of levels: 1588)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.42 0.02 0.39 0.46 1.00 1221 1086
##
## ~time (Number of levels: 7)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.34 0.13 0.16 0.80 1.00 971 813
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.73 0.16 -4.09 -3.34 1.00 1049 1001
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Traceplots for the posterior distribution of parameters and chains mixture evaluation. Also saved as pdf A4 files.
pdf(file=here("workflow_example", "bci_models_outputs","time_models",
"rec", "bci-quad_100-1-traceplot.pdf"),
paper="a4", height = 20, width=10,
onefile = T)
plot(gm, ask=F, N=4)
dev.off()plot(gm, ask=F, N=4)Table of model results.
fix <- as.data.frame(gmsum$fixed)
res <- as.data.frame(gmsum$fixed)
res[1,] <- c(sqrt((pi^2)/6), rep(NA,6))
quadrat <- as.data.frame(gmsum$random$quadrat)
sp <- as.data.frame(gmsum$random$sp)
time <- as.data.frame(gmsum$random$time)
quadrat.sp <- as.data.frame(gmsum$random$`quadrat:sp`)
quadrat.time <- as.data.frame(gmsum$random$`quadrat:time`)
sp.time <- as.data.frame(gmsum$random$`sp:time`)
result <- bind_rows(list(intercept = fix,
quadrat = quadrat,
sp = sp,
time = time,
`quadrat:sp` = quadrat.sp,
`quadrat:time` = quadrat.time,
`sp:time` = sp.time,
Residual = res), .id="term")
rownames(result) <- NULL
result$levels = c(NA, gmsum$ngrps$quadrat[1],
gmsum$ngrps$sp[1],
gmsum$ngrps$time[1],
gmsum$ngrps$`quadrat:sp`[1],
gmsum$ngrps$`quadrat:time`[1],
gmsum$ngrps$`sp:time`[1],
gmsum$nobs)
result$variance <- result$Estimate^2
sdtab <- data.frame(sub = "1",
data = "all",
fplot = "bci",
q_size = "quad_100",
ntrees = gmsum$nobs,
richness = gmsum$ngrps$sp[1])
sdtab <- sdtab[rep(seq_len(nrow(sdtab)), each = 8), ]
rownames(sdtab) <- NULL
res <- cbind(sdtab, result)
kable(res, digits = 2)| sub | data | fplot | q_size | ntrees | richness | term | Estimate | Est.Error | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | levels | variance |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | all | bci | quad_100 | 159050 | 256 | intercept | -3.73 | 0.16 | -4.09 | -3.34 | 1 | 1048.60 | 1001.49 | NA | 13.90 |
| 1 | all | bci | quad_100 | 159050 | 256 | quadrat | 0.06 | 0.06 | 0.00 | 0.33 | 1 | 1108.28 | 1169.34 | 5 | 0.00 |
| 1 | all | bci | quad_100 | 159050 | 256 | sp | 0.93 | 0.06 | 0.82 | 1.06 | 1 | 1117.29 | 970.27 | 256 | 0.86 |
| 1 | all | bci | quad_100 | 159050 | 256 | time | 0.34 | 0.13 | 0.16 | 0.80 | 1 | 971.47 | 812.75 | 7 | 0.12 |
| 1 | all | bci | quad_100 | 159050 | 256 | quadrat:sp | 0.22 | 0.02 | 0.18 | 0.26 | 1 | 1062.25 | 1090.16 | 961 | 0.05 |
| 1 | all | bci | quad_100 | 159050 | 256 | quadrat:time | 0.30 | 0.04 | 0.23 | 0.41 | 1 | 1072.95 | 1028.63 | 35 | 0.09 |
| 1 | all | bci | quad_100 | 159050 | 256 | sp:time | 0.42 | 0.02 | 0.39 | 0.46 | 1 | 1221.27 | 1086.23 | 1588 | 0.18 |
| 1 | all | bci | quad_100 | 159050 | 256 | Residual | 1.28 | NA | NA | NA | NA | NA | NA | 159050 | 1.64 |
# saving table ------------
save(res, file=here("workflow_example", "bci_models_outputs","time_models",
"rec", "bci-quad_100-1-table.Rdata"))